{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append(\"..\")\n",
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.array([1, 2, 3])\n",
    "print(x.__class__)\n",
    "print(x.shape)\n",
    "print(x.ndim)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(1, 10):\n",
    "    for j in range(1, 10):\n",
    "        print(f\"n = {i}, m = {j}, shape = {np.zeros((i, j)).shape}, dim = {np.zeros((i, j)).ndim}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "W = np.array([[1, 2, 3], [4, 5, 6]])\n",
    "X = np.array([[0, 1, 2], [3, 4, 5]])\n",
    "print(W + X)\n",
    "print(W * X)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = np.array([1, 2, 3])\n",
    "b = np.array([4, 5, 6])\n",
    "np.dot(a, b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "A = np.array([[1, 2], [3, 4]])\n",
    "B = np.array([[5, 6], [7, 8]])\n",
    "np.dot(A, B)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.random.randn(10, 2)\n",
    "print(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "W1 = np.random.randn(2, 4)\n",
    "b1 = np.random.randn(4)\n",
    "h = np.dot(x, W1) + b1\n",
    "print(h)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def sigmoid(x):\n",
    "    return 1 / (1 + np.exp(-x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "a = sigmoid(h)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "W2 = np.random.randn(4, 3)\n",
    "b2 = np.random.randn(3)\n",
    "s = np.dot(a, W2) + b2\n",
    "print(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Sigmoid:\n",
    "    def __init__(self):\n",
    "        self.params = []\n",
    "\n",
    "    def forward(self, x):\n",
    "        return 1 / (1 + np.exp(-x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Affine:\n",
    "    def __init__(self, W, b):\n",
    "        self.params = [W, b]\n",
    "\n",
    "    def forward(self, x):\n",
    "        W, b = self.params\n",
    "        out = np.dot(x, W) + b\n",
    "        return out"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class TwoLayerNet:\n",
    "    def __init__(self, input_size, hidden_size, output_size):\n",
    "        I, H, O = input_size, hidden_size, output_size\n",
    "        \n",
    "        # Initialization of Weight and Bias\n",
    "        W1 = np.random.randn(I, H)\n",
    "        b1 = np.random.randn(H)\n",
    "        W2 = np.random.randn(H, O)\n",
    "        b2 = np.random.randn(O)\n",
    "        \n",
    "        # Create Layers\n",
    "        self.layers = [\n",
    "            Affine(W1, b1),\n",
    "            Sigmoid(),\n",
    "            Affine(W2, b2)\n",
    "        ]\n",
    "        \n",
    "        # Substitute All Weight for \"params\"\n",
    "        self.params = []\n",
    "        for layer in self.layers:\n",
    "            self.params += layer.params\n",
    "        \n",
    "    def predict(self, x):\n",
    "        for layer in self.layers:\n",
    "            x = layer.forward(x)\n",
    "        return x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x = np.random.randn(10, 2)\n",
    "model = TwoLayerNet(2, 4, 3)\n",
    "s = model.predict(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(s)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "D, N = 8, 7\n",
    "x = np.random.randn(1, D)\n",
    "y = np.repeat(x, N, axis=0)\n",
    "print(y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dy = np.random.randn(N, D)\n",
    "dx = np.sum(dy, axis=0, keepdims=True)\n",
    "print(dx)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MatMul:\n",
    "    def __init__(self, W):\n",
    "        self.params = [W]\n",
    "        self.grads = [np.zeros_like(W)]\n",
    "        self.x = None\n",
    "    \n",
    "    def forward(self, x):\n",
    "        W, = self.params\n",
    "        out = np.dot(x, W)\n",
    "        self.x = x\n",
    "        return out\n",
    "    \n",
    "    def backward(self, dout):\n",
    "        W, = self.params\n",
    "        dx = np.dot(dout, W.T)\n",
    "        sW = np.dot(self.x.T, dout)\n",
    "        self.grads[0][...] = dW\n",
    "        return dx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Sigmoid:\n",
    "    def __init__(self):\n",
    "        self.params, self.grads = [], []\n",
    "        self.out = None\n",
    "    \n",
    "    def forward(self, x):\n",
    "        out = 1 / (1 + np.exp(-x))\n",
    "        self.out = out\n",
    "        return out\n",
    "    \n",
    "    def backward(self, dout):\n",
    "        dx = dout * (1.0 - self.out) * self.out\n",
    "        return dx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Affine:\n",
    "    def __init__(self, W, b):\n",
    "        self.params = [W, b]\n",
    "        self.grads = [np.zeros_like(W, np.zeros_like(b))]\n",
    "        self.x = None\n",
    "    \n",
    "    def forward(self, x):\n",
    "        W, b = self.params\n",
    "        out = np.dot(x, W) + b\n",
    "        self.x = x\n",
    "        return out\n",
    "    \n",
    "    def backward(self, dout):\n",
    "        W, b = self.params\n",
    "        dx = np.dot(dout, W.T)\n",
    "        dW = np.dot(self.x.T, dout)\n",
    "        db = np.sum(dout, axis=0)\n",
    "        \n",
    "        self.grads[0][...] = dW\n",
    "        self.grads[1][...] = db\n",
    "        return dx"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class SGD:\n",
    "    def __init__(self, lr=0.01):\n",
    "        self.lr = lr\n",
    "    \n",
    "    def update(self, params, grads):\n",
    "        for i in range(len(params)):\n",
    "            params[i] -= self.lr * grads[i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\"\"\"\n",
    "model = TwoLayerNet(...)\n",
    "optimizer = SGD()\n",
    "\n",
    "for i in range(10000):\n",
    "    ...\n",
    "    x_batch, t_batch = get_mini_batch(...)\n",
    "    loss = model.forward(x_batch, t_batch)\n",
    "    model.backward()\n",
    "    optimizer.update(model.params, model.grads)\n",
    "    ...\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from dataset import spiral"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "x, t = spiral.load_data()\n",
    "print(\"x\", x.shape)\n",
    "print(\"t\", t.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(3):\n",
    "    plt.scatter(x[:, 0][t[:, i] == 1], x[:, 1][t[:, i] == 1], label=str(i), marker=(\"o\", \"x\", \"^\")[i])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from common.layers import Affine, Sigmoid, SoftmaxWithLoss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class TwoLayerNet:\n",
    "    def __init__(self, input_size, hidden_size, output_size):\n",
    "        I, H, O = input_size, hidden_size, output_size\n",
    "        \n",
    "        # initialization of weights and bias\n",
    "        W1 = 0.01 * np.random.randn(I, H)\n",
    "        b1 = np.zeros(H)\n",
    "        W2 = 0.01 * np.random.randn(H, O)\n",
    "        b2 = np.zeros(O)\n",
    "        \n",
    "        # create layers\n",
    "        self.layers = [\n",
    "            Affine(W1, b1),\n",
    "            Sigmoid(),\n",
    "            Affine(W2, b2)\n",
    "        ]\n",
    "        self.loss_layer = SoftmaxWithLoss()\n",
    "        \n",
    "        # gather all weights and gradients\n",
    "        self.params, self.grads = [], []\n",
    "        for layer in self.layers:\n",
    "            for layer in self.layers:\n",
    "                self.params += layer.params\n",
    "                self.grads += layer.grads\n",
    "                \n",
    "    def predict(self, x):\n",
    "        for layer in self.layers:\n",
    "            x = layer.forward(x)\n",
    "        return x\n",
    "    \n",
    "    def forward(self, x, t):\n",
    "        score = self.predict(x)\n",
    "        loss = self.loss_layer.forward(score, t)\n",
    "        return loss\n",
    "    \n",
    "    def backward(self, dout=1):\n",
    "        dout = self.loss_layer.backward(dout)\n",
    "        for layer in reversed(self.layers):\n",
    "            dout = layer.backward(dout)\n",
    "        return dout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from common.optimizer import SGD\n",
    "from two_layer_net import TwoLayerNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# setting of hyperparameters\n",
    "max_epoch = 300\n",
    "batch_size = 30\n",
    "hidden_size = 10\n",
    "learning_rate = 1.0\n",
    "\n",
    "# data loading, create model and optimizer\n",
    "x, t = spiral.load_data()\n",
    "model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)\n",
    "optimizer = SGD(lr=learning_rate)\n",
    "\n",
    "# variables for learning phase\n",
    "data_size = len(x)\n",
    "max_iters = data_size // batch_size\n",
    "total_loss = 0\n",
    "loss_count = 0\n",
    "loss_list = []\n",
    "\n",
    "for epoch in range(max_epoch):\n",
    "    # data shuffling\n",
    "    idx = np.random.permutation(data_size)\n",
    "    x = x[idx]\n",
    "    t = t[idx]\n",
    "    \n",
    "    for iters in range(max_iters):\n",
    "        batch_x = x[iters * batch_size:(iters + 1) * batch_size]\n",
    "        batch_t = t[iters * batch_size:(iters + 1) * batch_size]\n",
    "        \n",
    "        # compute gradients and update parameters\n",
    "        loss = model.forward(batch_x, batch_t)\n",
    "        model.backward()\n",
    "        optimizer.update(model.params, model.grads)\n",
    "        \n",
    "        total_loss += loss\n",
    "        loss_count += 1\n",
    "        \n",
    "        # output learning progress regularly\n",
    "        if (iters + 1) % 10 == 0:\n",
    "            avg_loss = total_loss / loss_count\n",
    "            print(f\"| epoch {epoch + 1} | iter {iters + 1} / {max_iters} | loss {avg_loss:.2f}\")\n",
    "            loss_list.append(avg_loss)\n",
    "            total_loss, loss_count = 0, 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.plot(range(len(loss_list)), loss_list)\n",
    "plt.xlabel(\"iterations (x10)\")\n",
    "plt.ylabel(\"loss\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from common.trainer import Trainer"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_epoch = 300\n",
    "batch_size = 30\n",
    "hidden_size = 10\n",
    "learning_rate = 1.0\n",
    "\n",
    "x, t = spiral.load_data()\n",
    "model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)\n",
    "optimizer = SGD(lr=learning_rate)\n",
    "\n",
    "trainer = Trainer(model, optimizer)\n",
    "trainer.fit(x, t, max_epoch, batch_size, eval_interval=10)\n",
    "trainer.plot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "b = np.random.randn(3).astype(np.float32)\n",
    "b.dtype"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "c = np.random.randn(3).astype('f')\n",
    "c.dtype"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
